#---------------------------------------------------------
# Are Intermediary Constraints Priced?
# Du, Hébert, Huber
# The Review of Financial Studies 2022
#---------------------------------------------------------

#---------------------------------------------------------
# Calculate returns to trading portfolios of single-currency 1M-forward 3M Forward Arbitrage
# (1) Return based on only days where returns in all constituent currencies are available
# (2) Carry portfolios (top 5 basis) rebalanced monthly
# (3) Include GMM FACTORS of various versions, which are weights applied to basis (instead of returns)
# NOTE: Monthly rebalance date is the last day of the PREVIOUS month, applicable to Top5 Basis.
#       All trades in the month have the same weight.
# NOTE: Equal weighted portfolios re-adjust weight denominator depending on number of available currencies.
#---------------------------------------------------------

#---------------------------------------------------------
# Set-up
#---------------------------------------------------------

if (exists('script_path')) {
  rm(list = setdiff(ls(), 'script_path'))
} else {
  args = commandArgs(trailingOnly = T)
  if (length(args) > 0) {
    script_path <- args[1]  
    rm(list = setdiff(ls(), 'script_path'))
  } else {
    rm(list = ls())
    script_path <- '~/Dropbox/ForwardArbitrage/CodeReplication/'
  } 
}

# Inputs
source(paste0(script_path, 'RCode/ArbFunc.R'))
load(paste0(script_path, 'OutputInterim/Basis_and_Returns_1st.RData')) 
basis_pc <- fread(paste0(script_path, 'OutputInterim/PCports.csv'))

# Initialization
currency_names_single <- paste(single_table$fund_currency, single_table$invest_currency, sep = '_')
currency_single_sans_CHF <- currency_names_single[which(substr(currency_names_single, 1, 3) != 'CHF')]
currency_single_sans_CAD <- currency_names_single[which(substr(currency_names_single, 1, 3) != 'CAD')]
currency_names_ois <- paste(ois_table$fund_currency, ois_table$invest_currency, sep = '_')
currency_ois_sans_CHF <- currency_names_ois[which(substr(currency_names_ois, 5, 7) != 'CHF')]
currency_names_ibor <- paste(ibor_table$fund_currency, ibor_table$invest_currency, sep = '_')
currency_ibor_sans_CAD <- currency_names_ibor[which(substr(currency_names_ibor, 5, 7) != 'CAD')]
weighting_scheme_names <- c('classic_carry', 'single_3v3', 'equal_dollar', 'equal_pairs', 'top5_basis')

# CHF OIS missing from 2018 on; CAD IBOR missing 2016-2018; for pairs, check there are at least 5; NA for padding so that each list contains a dataframe
currency_to_check <- list('classic_carry' = data.frame(ois = c('AUD', 'JPY'), ibor = c('AUD', 'JPY')),
                          'single_3v3' = data.frame(ois = currency_single_sans_CHF, 
                                                    ibor = currency_single_sans_CAD, stringsAsFactors = F),
                          'equal_dollar' = data.frame(ois = currency_single_sans_CHF, 
                                                      ibor = currency_single_sans_CAD, stringsAsFactors = F),
                          'equal_pairs' = data.frame(ois = c(currency_ois_sans_CHF, 
                                                             rep(NA, ifelse(length(currency_ois_sans_CHF) < length(currency_ibor_sans_CAD), length(currency_ibor_sans_CAD) - length(currency_ois_sans_CHF), 0))), 
                                                     ibor = c(currency_ibor_sans_CAD, 
                                                              rep(NA, ifelse(length(currency_ibor_sans_CAD) < length(currency_ois_sans_CHF), length(currency_ois_sans_CHF) - length(currency_ibor_sans_CAD), 0))),
                                                     stringsAsFactors = F),
                          'top5_basis' = data.frame(ois = c(currency_ois_sans_CHF, 
                                                            rep(NA, ifelse(length(currency_ois_sans_CHF) < length(currency_ibor_sans_CAD), length(currency_ibor_sans_CAD) - length(currency_ois_sans_CHF), 0))), 
                                                    ibor = c(currency_ibor_sans_CAD, 
                                                             rep(NA, ifelse(length(currency_ibor_sans_CAD) < length(currency_ois_sans_CHF), length(currency_ois_sans_CHF) - length(currency_ibor_sans_CAD), 0))),
                                                    stringsAsFactors = F))

#---------------------------------------------------------
# Construct portfolios - OIS, IBOR
#---------------------------------------------------------

# Equal portfolios
for (w in which(weighting_scheme_names == 'single_3v3' | weighting_scheme_names == 'equal_dollar' | weighting_scheme_names == 'equal_pairs')) {
  
  for (rate in c('ois', 'ibor')) {
    if (rate == 'ois') {
      return_tenor_list <- c('1M_fwd_1M', '1M_fwd_3M', '3M_fwd_3M')
      if (weighting_scheme_names[w] == 'equal_dollar' | weighting_scheme_names[w] == 'single_3v3') {
        currency_names <- currency_single_sans_CHF
      } else if (weighting_scheme_names[w] == 'equal_pairs') {
        currency_names <- currency_ois_sans_CHF
      }
    } else if (rate == 'ibor') {
      return_tenor_list <- c('1M_fwd_3M', '3M_fwd_3M')
      if (weighting_scheme_names[w] == 'equal_dollar' | weighting_scheme_names[w] == 'single_3v3') {
        currency_names <- currency_single_sans_CAD
      } else if (weighting_scheme_names[w] == 'equal_pairs') {
        currency_names <- currency_ibor_sans_CAD
      }
    }
    
    # Include 3M basis to make sure that returns are only calculated on days when there are spot 3M
    basis_names <- paste(currency_names, rate, 'spot_log_basis_3M', sep = '_')
    
    for (return_tenor in return_tenor_list) {
      # Temp data table with only relevant information
      full_temp <- copy(basis_and_returns)
      return_names <- paste(currency_names, rate, 'log_return', return_tenor, sep = '_')
      full_temp[, names(full_temp)[!(names(full_temp) %in% c('Date', basis_names, return_names))] := NULL]
      
      # Set temp days without full info to NA
      values_to_check <- currency_to_check[[weighting_scheme_names[w]]][, rate]
      columns_to_check <- c(paste(values_to_check[!is.na(values_to_check)], rate, 'spot_log_basis_3M', sep = '_'), 
                            paste(values_to_check[!is.na(values_to_check)], rate, 'log_return', return_tenor, sep = '_'))
      temp <- full_temp[apply(full_temp[, (columns_to_check), with = FALSE], 1, function(x) sum(is.na(x)) == 0 & sum(!is.na(x)) > 5)]
      
      # Calculate average over available days
      if (w == which(weighting_scheme_names == 'single_3v3')) {
        if (rate == 'ois') {
          temp[, paste('portfolio', rate, 'log_return', weighting_scheme_names[w], sep = '_') := 
                 apply(temp[, paste(c('AUD', 'CAD', 'GBP'), 'USD', rate, 'log_return', return_tenor, sep = '_'), with = F], 1, 
                       function(x) sum(x, na.rm = T) / sum(!is.na(x))) - 
                 apply(temp[, paste(c('EUR', 'JPY'), 'USD', rate, 'log_return', return_tenor, sep = '_'), with = F], 1, 
                       function(x) sum(x, na.rm = T) / sum(!is.na(x)))]
          # Basis factor
          temp[, paste('factor', rate, 'spot_3M', weighting_scheme_names[w], sep = '_') :=
                 apply(temp[, paste(c('AUD', 'CAD', 'GBP'), 'USD', rate, 'spot_log_basis_3M', sep = '_'), with = F], 1, 
                       function(x) sum(x, na.rm = T) / sum(!is.na(x))) - 
                 apply(temp[, paste(c('EUR', 'JPY'), 'USD', rate, 'spot_log_basis_3M', sep = '_'), with = F], 1, 
                       function(x) sum(x, na.rm = T) / sum(!is.na(x)))]
        } else if (rate == 'ibor') {
          temp[, paste('portfolio', rate, 'log_return', weighting_scheme_names[w], sep = '_') :=
                 apply(temp[, paste(c('AUD', 'GBP'), 'USD', rate, 'log_return', return_tenor, sep = '_'), with = F], 1,
                       function(x) sum(x, na.rm = T) / sum(!is.na(x))) -
                 apply(temp[, paste(c('CHF', 'EUR', 'JPY'), 'USD', rate, 'log_return', return_tenor, sep = '_'), with = F], 1,
                       function(x) sum(x, na.rm = T) / sum(!is.na(x)))]
          # Basis factor
          temp[, paste('factor', rate, 'spot_3M', weighting_scheme_names[w], sep = '_') :=
                 apply(temp[, paste(c('AUD', 'GBP'), 'USD', rate, 'spot_log_basis_3M', sep = '_'), with = F], 1,
                       function(x) sum(x, na.rm = T) / sum(!is.na(x))) -
                 apply(temp[, paste(c('CHF', 'EUR', 'JPY'), 'USD', rate, 'spot_log_basis_3M', sep = '_'), with = F], 1,
                       function(x) sum(x, na.rm = T) / sum(!is.na(x)))]
        }
      } else {
        temp[, paste('portfolio', rate, 'log_return', weighting_scheme_names[w], sep = '_') := 
               apply(temp[, paste(currency_names, rate, 'log_return', return_tenor, sep = '_'), with = F], 1, 
                     function(x) sum(x, na.rm = T) / sum(!is.na(x)))]
        # Basis factor
        temp[, paste('factor', rate, 'spot_3M', weighting_scheme_names[w], sep = '_') :=
               apply(temp[, paste(currency_names, rate, 'spot_log_basis_3M', sep = '_'), with = F], 1, 
                     function(x) sum(x, na.rm = T) / sum(!is.na(x)))]
      }
      basis_and_returns[, paste('portfolio', rate, 'log_return', weighting_scheme_names[w], return_tenor, sep = '_') := 
                          temp[, paste('portfolio', rate, 'log_return', weighting_scheme_names[w], sep = '_'), 
                               with = FALSE][match(unlist(basis_and_returns[, Date]), unlist(temp[, Date]))]]
      basis_and_returns[, paste('factor', rate, 'spot_3M', weighting_scheme_names[w], sep = '_') :=
                          temp[, paste('factor', rate, 'spot_3M', weighting_scheme_names[w], sep = '_'), with = F][match(basis_and_returns$Date, temp$Date)]]
    }
  }
}

# Carry portfolios - need to first calculate weights
for (rate in c('ois', 'ibor')) {
  if (rate == 'ois') {
    currency_names <- currency_ois_sans_CHF
    return_tenor_list <- c('1M_fwd_1M', '1M_fwd_3M', '3M_fwd_3M')
  } else if (rate == 'ibor') {
    currency_names <- currency_ibor_sans_CAD
    return_tenor_list <- c('1M_fwd_3M', '3M_fwd_3M')
  }
  
  # Include 3M basis to make sure that returns are only calculated on days when there are spot 3M
  basis_names <- paste(currency_names, rate, 'spot_log_basis_3M', sep = '_')
  
  for (return_tenor in return_tenor_list) {
    # Temp data table with only relevant information
    full_temp <- copy(basis_and_returns)
    return_names <- paste(currency_names, rate, 'log_return', return_tenor, sep = '_')
    full_temp[, names(full_temp)[!(names(full_temp) %in% c('Date', basis_names, return_names))] := NULL]
    
    for (w in which(weighting_scheme_names == 'top5_basis')) {
      # Set temp days without full info to NA
      values_to_check <- currency_to_check[[weighting_scheme_names[w]]][, rate]
      columns_to_check <- c(paste(values_to_check[!is.na(values_to_check)], rate, 'spot_log_basis_3M', sep = '_'), 
                            paste(values_to_check[!is.na(values_to_check)], rate, 'log_return', return_tenor, sep = '_'))
      temp <- full_temp[apply(full_temp[, (columns_to_check), with = FALSE], 1, function(x) sum(is.na(x)) == 0 & sum(!is.na(x)) > 5)]
      
      # Calculate weights
      weights <- compute_top_weight(portfolio_data = temp, 
                                    type_of_reference = 'basis',
                                    top_number = 5,
                                    currency_names = currency_names,
                                    currency_values = paste(currency_names, rate, 'spot_log_basis_3M', sep = '_'), 
                                    interest_reference = NULL)
      
      # Determine rebalance date and merge in weights; the first date is month start, all the others are last date of the previous month
      temp[, month_end := max(Date), by = floor_date(Date, unit = 'month')]
      first_weight <- full_temp[year(Date) == year(temp$month_end[1]) & month(Date) == month(temp$month_end[1]), min(Date)]
      rebalance_lookup <- data.table(cbind(unique(temp$month_end), c(first_weight, shift(unique(temp$month_end), n = 1, type = 'lag')[-1])))
      names(rebalance_lookup) <- c('month_end', 'rebalance_date_in_month')
      rebalance_lookup[, `:=` (month_end = as.Date(month_end), rebalance_date_in_month = as.Date(rebalance_date_in_month))]
      temp[, rebalance_date_in_month := rebalance_lookup$rebalance_date_in_month[match(unlist(temp[, month_end]), unlist(rebalance_lookup[, month_end]))]]
      
      weight_names <- paste(currency_names, 'weight', sep = '_')
      temp <- merge(temp, weights[, c('Date', weight_names), with = FALSE],
                    by.x = 'rebalance_date_in_month', by.y = 'Date')
      temp[, paste('portfolio', rate, 'log_return', weighting_scheme_names[w], sep = '_') := 
             apply(temp[, (return_names), with = FALSE] * temp[, (weight_names), with = FALSE],
                   1, function(x) sum(x, na.rm = TRUE))]
      basis_and_returns[, paste('portfolio', rate, 'log_return', weighting_scheme_names[w], return_tenor, sep = '_') := 
                          temp[, paste('portfolio', rate, 'log_return', weighting_scheme_names[w], sep = '_'), 
                               with = FALSE][match(unlist(basis_and_returns[, Date]), unlist(temp[, Date]))]]
      
      # Basis factor
      temp[, paste('factor', rate, 'spot_3M', weighting_scheme_names[w], sep = '_') :=
             apply(temp[, (basis_names), with = FALSE] * temp[, (weight_names), with = FALSE], 1, function(x) sum(x, na.rm = TRUE))]
      basis_and_returns[, paste('factor', rate, 'spot_3M', weighting_scheme_names[w], sep = '_') :=
                          temp[, paste('factor', rate, 'spot_3M', weighting_scheme_names[w], sep = '_'), with = F][match(basis_and_returns$Date, temp$Date)]]
      
    }
    # Classic carry
    basis_and_returns[, paste('portfolio', rate, 'log_return_classic_carry', return_tenor, sep = '_') := 
                        get(paste('AUD_JPY', rate, 'log_return', return_tenor, sep = '_'))] 
    basis_and_returns[is.na(get(paste('AUD_JPY', rate, 'spot_log_basis_3M', sep = '_'))), paste('portfolio', rate, 'log_return_classic_carry', return_tenor, sep = '_') := NA]
  }
}

# Other factors
for (rate in c('ois', 'ibor')) {
  basis_and_returns[, paste('factor', rate, 'spot_3M_classic_carry', sep = '_') := get(paste('AUD_JPY', rate, 'spot_log_basis_3M', sep = '_'))]
  basis_and_returns[, paste('factor', rate, 'spot_3M_usd_jpy', sep = '_') := get(paste('USD_JPY', rate, 'spot_log_basis_3M', sep = '_'))]
}

#---------------------------------------------------------
# Merge in PC of returns
#---------------------------------------------------------
basis_pc[, Date := dmy(Date)]
basis_and_returns[, portfolio_ois_log_return_all_pc_1M_fwd_1M := basis_pc$pc1ret_ois_stand_1M_fwd_1M[match(basis_and_returns$Date, basis_pc$Date)]]
basis_and_returns[, portfolio_ois_log_return_all_pc_1M_fwd_3M := basis_pc$pc1ret_ois_stand_1M_fwd_3M[match(basis_and_returns$Date, basis_pc$Date)]]
basis_and_returns[, portfolio_ois_log_return_all_pc_3M_fwd_3M := basis_pc$pc1ret_ois_stand_3M_fwd_3M[match(basis_and_returns$Date, basis_pc$Date)]]
basis_and_returns[, portfolio_ois_log_return_top6_pc_1M_fwd_1M := basis_pc$pc1top6_ois_stand_1M_fwd_1M[match(basis_and_returns$Date, basis_pc$Date)]]
basis_and_returns[, portfolio_ois_log_return_top6_pc_1M_fwd_3M := basis_pc$pc1top6_ois_stand_1M_fwd_3M[match(basis_and_returns$Date, basis_pc$Date)]]
basis_and_returns[, portfolio_ois_log_return_top6_pc_3M_fwd_3M := basis_pc$pc1top6_ois_stand_3M_fwd_3M[match(basis_and_returns$Date, basis_pc$Date)]]

basis_and_returns[, portfolio_ibor_log_return_all_pc_1M_fwd_3M := basis_pc$pc1ret_ibor_stand_1M_fwd_3M[match(basis_and_returns$Date, basis_pc$Date)]]
basis_and_returns[, portfolio_ibor_log_return_all_pc_3M_fwd_3M := basis_pc$pc1ret_ibor_stand_3M_fwd_3M[match(basis_and_returns$Date, basis_pc$Date)]]
basis_and_returns[, portfolio_ibor_log_return_top6_pc_1M_fwd_3M := basis_pc$pc1top6_ibor_stand_1M_fwd_3M[match(basis_and_returns$Date, basis_pc$Date)]]
basis_and_returns[, portfolio_ibor_log_return_top6_pc_3M_fwd_3M := basis_pc$pc1top6_ibor_stand_3M_fwd_3M[match(basis_and_returns$Date, basis_pc$Date)]]

## Save
save(basis_and_returns, file = paste0(script_path, 'OutputInterim/Basis_and_Returns.RData'))

## Shorten name for Stata
names(basis_and_returns)[grepl('log_return', names(basis_and_returns))] <-
  sub('log_return', 'r', names(basis_and_returns)[grepl('log_return', names(basis_and_returns))])
names(basis_and_returns)[grepl('future_log_basis', names(basis_and_returns))] <-
  sub('future_log_basis', 'fut_log_b', names(basis_and_returns)[grepl('future_log_basis', names(basis_and_returns))])
names(basis_and_returns)[grepl('portfolio', names(basis_and_returns))] <-
  sub('portfolio', 'p', names(basis_and_returns)[grepl('portfolio', names(basis_and_returns))])
names(basis_and_returns)[grepl('factor', names(basis_and_returns))] <-
  sub('factor', 'f', names(basis_and_returns)[grepl('factor', names(basis_and_returns))])

write_dta(basis_and_returns, paste0(script_path, 'OutputInterim/Basis_and_Returns.dta'))

